
#Out of sample forecast based on Nel and Righarts (2008)

library(readstata13)
library(tidyverse)
library(fixest)
library(readxl)
library(broom)
library(ggthemes)
library(Zelig)
library(stargazer)
library(latex2exp)
library(dplyr)
library(ggplot2)
library(ROCit)
if(!require('data.table')) {
  install.packages('data.table')
  library('data.table')
}
directory = c("/Users/roniyadlin/Dropbox/Climate conflict")
nd_conflict <- read.dta13("/Users/roniyadlin/Dropbox/Climate conflict/data/clean/climate_conflict_panel_country.dta")
setwd("/Users/roniyadlin/Dropbox/Climate conflict")
#### Load in control variables data (obtained from OWID), and merge to panel
child_mortality <- read_csv("data/raw/controls/child-mortality-igme.csv")  %>%
  filter(Year>2009)   %>%
  filter(Year<2021) 
polity <- read_csv("data/raw/controls/experience-with-democracy-polity.csv") %>%
  filter(Year>2009)   %>%
  filter(Year<2021) 
gdp_pc <- read_csv("data/raw/controls/gdp-per-capita-maddison-2020.csv") %>%
  filter(Year>2009)   %>%
  filter( Year<2021) 
political_regime <- read_csv("data/raw/controls/political-regime-amb-row.csv") %>%
  filter(Year>2009)   %>%
  filter(Year<2021) 
pop_growth <- read_csv("data/raw/controls/population-growth-rates.csv") %>%
  filter(Year>2009)   %>%
  filter(Year<2021) 
population <- read_csv("data/raw/controls/population-since-1800.csv") %>%
  filter(Year>2009)   %>%
  filter(Year<2021) 
brevpeace <- read_csv("data/raw/controls/brevity-of-peace.csv") %>%
  filter(Year>2009)   %>%
  filter(Year<2021) 

#put all data frames into list
df_list <- list(child_mortality, polity, gdp_pc, political_regime, pop_growth, population,brevpeace)      

#merge all data frames together

controls_df <- Reduce(function(x, y) merge(x, y, all=TRUE), df_list)  %>%
  filter_at(vars(Code), all_vars(!is.na(.))) %>%
  rename(iso3=Code, year=Year, infant_mr='Mortality rate, under-5 (per 1,000 live births)', 
         pop='Population (historical estimates)', gdp_pc='GDP per capita', regime_type='regime_amb_row_owid', 
         gdp_growth='Growth rate - Sex: all - Age: all - Variant: estimates')


## Merge
analysis_df <- full_join(controls_df, nd_conflict, by = c("iso3", "year"))

##make controls matrix
allND_pc<-analysis_df$anynatdis/analysis_df$pop
imr_t_1<-analysis_df$infant_mr
imr_sqr_t_1<-imr_t_1*imr_t_1
mixed<-analysis_df$regime_type
gdpgro<-analysis_df$gdp_growth
brevity<-analysis_df$brevpeace
controls<-cbind(allND_pc,imr_t_1,imr_sqr_t_1,mixed,
                gdpgro,brevity)
colnames(controls)<-c("allND_pc","imr_t_1","imr_sq_t_1",
                      "mixedx", "gdpgrox", "brevity")


##Run old regression for beta values:
rep_df <- read.dta13("/Users/roniyadlin/Dropbox/Climate conflict/replication materials/Nel and Righarts ND and VCC final 20 June 2007.dta")
rep_df <- rep_df %>%                           
  group_by(state) %>%
  mutate(imr_t_1 = lag(imr),
         imr_sq_t_1 = lag(imr_sq),
         vccall_tp1 = lead(vccall),
         vccmajor_tp1 = lead(vccmajor)) 
t1_2 <- zelig(vccall ~ allND_pc + imr_t_1 + imr_sq_t_1 + mixed + gdpgro + brevity, 
              model = "relogit", data = rep_df)
coefs2<-coefficients(t1_2)
coefs2<-as.data.frame(coefs2)
beta0<-data.frame(rep(coefs2[1,],times=nrow(controls)))

##Covariance matrix
V<-data.frame(vcov(t1_2))
rownames(coefs2)<-c("Intercept","allND_pc","imr_t_1","imr_sq_t_1",
                    "mixed", "gdpgro", "brevity")

##Forecasting
##Applying regression formula
forecast2<-beta0+controls%*%coefs2[2:7,]
colnames(forecast2)<-c("Civil_War")

##Calculating odds or war using logit equation
odds2<-exp(forecast2)
#max(na.omit(odds2))
#ggplot(odds2, aes(x=Civil_War)) + geom_histogram()

##Calculating probability of war using logit equation
prob2<-1/(1+exp(-forecast2))

##Making correction per Rare Events Logit (King/Zheng 2001)
Cmult<-as.matrix(t(coefs2)) %*% as.matrix(V) %*% as.matrix(coefs2)
C2<-(.5-prob2)*prob2*(1-prob2)*Cmult
prob2corrected<-prob2+C2
totevents<-data.frame(analysis_df$total_events)
totevents$War = ifelse(totevents > 0, 1, 0)
pred<-data.frame(analysis_df$Entity,analysis_df$year,prob2corrected,totevents$War,analysis_df$latitude,analysis_df$longitude)
colnames(pred)<-c("Entity","Year", "Civil_War_prob","Actual War","Latitude","Longitude")
write.csv(pred,"/Users/roniyadlin/Dropbox/Climate conflict/data/NRdata.csv", row.names = FALSE)



